package reptile;

import java.io.*;
import java.net.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class URLDemo {
    public static void main(String[] args) {
        String testUrl = "https://fanyi.baidu.com/#zh/en/";
        try {
            URL url = new URL(testUrl);
            //通过url建立与网页的连接
            URLConnection conn = url.openConnection();
            //通过链接取得网页返回的数据
            InputStream is = conn.getInputStream();

            System.out.println(conn.getContentEncoding());
            //一般按行读取网页数据，并进行内容分析
            //因此用BufferedReader和InputStreamReader把字节流转化为字符流的缓冲流
            //进行转换时，需要处理编码格式问题
            BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
            Pattern p = Pattern.compile("<a .*href=.+</a>");
            //按行读取并打印
            String line = null;

            while ((line = br.readLine()) != null) {
                Matcher m = p.matcher(line);
                while (m.find()) {
                    String href = m.group();
                    href = href.substring(href.indexOf("href=\"")).substring(6);
                    if (href.startsWith("http")) {
                        href = href.substring(0, href.indexOf('\"'));
                        System.out.println(href);
                    }
                }

            }
            br.close();
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
